In [97]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
# import seaborn as sns 
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re

COVID-19 in Italy. Visuals


(alternatively, see results and code together here)

 


Data source: this GitHubi page

Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.

Regional data files (Dati per Regione):
  • Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
  • File complessivo: dpc-covid19-ita-regioni.csv
  • File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv

 

In [98]:
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
In [99]:
def dewhite(x):
    ''.join(re.findall('\d+', x))

dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
    
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
In [100]:
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()

md("Currently data as of date: {}".format(cdate))
Out[100]:

Currently data as of date: 2020-10-13T17:00:00


 

What's in the original dataframe?

In [101]:
md("All column names: {}".format(dat.columns.tolist()))
Out[101]:

All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note']

 

Rows for the last 5 days

In [102]:
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note']

df = pd.merge(df, dt2, left_on='Region', right_on='Region')

df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)

df['NewPositives'] = np.abs(df['NewPositives'])

dat.tail(5)
Out[102]:
data stato codice_regione denominazione_regione lat long ricoverati_con_sintomi terapia_intensiva totale_ospedalizzati isolamento_domiciliare ... variazione_totale_positivi nuovi_positivi dimessi_guariti deceduti casi_da_sospetto_diagnostico casi_da_screening totale_casi tamponi casi_testati note
4888 2020-10-13T17:00:00 ITA 19 Sicilia 38.115697 13.362357 426 44 470 4407 ... 195 334 4708 341 6366.0 3560.0 9926 561166 402751.0 NaN
4889 2020-10-13T17:00:00 ITA 9 Toscana 43.769231 11.255889 218 44 262 6655 ... 413 480 11008 1181 14546.0 4560.0 19106 856638 576101.0 NaN
4890 2020-10-13T17:00:00 ITA 10 Umbria 43.106758 12.388247 68 11 79 1412 ... 121 136 2027 90 1855.0 1753.0 3608 236287 140125.0 NaN
4891 2020-10-13T17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 11 3 14 244 ... 33 33 1125 146 1383.0 146.0 1529 31763 21861.0 NaN
4892 2020-10-13T17:00:00 ITA 5 Veneto 45.434905 12.338452 272 33 305 6350 ... 366 485 23435 2226 22334.0 9982.0 32316 2074007 810366.0 NaN

5 rows × 21 columns


 

Variables names into English and their explanation

  • HospWithSymptoms : Currently hospitalized patients with symptoms
  • IC : Intensive care
  • HospTotal: Total number of currently hospitalized patients
  • AtHome : Currently at home confinement
  • CurrentlyPositive : Total amount of current positive cases (Hospitalised patients + Home confinement)
  • NewPositives : New amount of positive cases (Actual total amount of current positive cases - total amount of current positive cases of the previous day)
  • TotalCases : Total amount of positive cases
  • NoOfTests : Tests performed
In [103]:
df.tail()
Out[103]:
Date Region Lat Long HospWithSymptoms IC HospTotal AtHome CurrentlyPositive VariationOfPositives NewPositives Recovered Deaths Diagnostico Screening TotalCases NoOfTests casi_testati note Pop
Date
2020-10-09 2020-10-09 Veneto 45.434905 12.338452 208 21 229 5245 5474 391 595 22821 2209 21936.0 8568.0 30504 2037471 796159.0 NaN 4905854
2020-10-10 2020-10-10 Veneto 45.434905 12.338452 231 20 251 5581 5832 358 561 23017 2216 22136.0 8929.0 31065 2052467 801647.0 NaN 4905854
2020-10-11 2020-10-11 Veneto 45.434905 12.338452 238 24 262 5826 6088 256 438 23197 2218 22196.0 9307.0 31503 2063607 806051.0 NaN 4905854
2020-10-12 2020-10-12 Veneto 45.434905 12.338452 244 29 273 6016 6289 201 328 23323 2219 22250.0 9581.0 31831 2069955 808554.0 NaN 4905854
2020-10-13 2020-10-13 Veneto 45.434905 12.338452 272 33 305 6350 6655 366 485 23435 2226 22334.0 9982.0 32316 2074007 810366.0 NaN 4905854

 

(double click and click on legend to select one or multiple regions in the graph)

In [104]:
df2 = df

fig = px.line(df2, x=df2.index, y="NewPositives", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Daily new cases, absolute numbers")
fig.show()
In [105]:
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="MovAv7", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="1-week rolling average of daily new cases")
fig.show()
In [106]:
df2['NewPos_pc'] = df2['NewPositives']/df2['Pop']*1000_000

df2['NewPos_pc'] = df2['NewPos_pc'].rolling(window=7).mean()

fig = px.line(df2[df2.index>'2020-3-1'], x=df2.index[df2.index>'2020-3-1'], y="NewPos_pc", color="Region", 
              hover_name="Region", log_y=False)
fig.update_layout(title="1-week rolling average of daily new cases, per million")
fig.show()
In [107]:
df2['IC_pc'] = df2['IC']/df2['Pop']*1000_000

fig = px.line(df2, x="Date", y="IC_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current intensive care patients, per million")
fig.show()
In [108]:
df2['Hosp_pc'] = df2['HospTotal']/df2['Pop']*1000000

fig = px.line(df2, x="Date", y="Hosp_pc", color="Region", hover_name="Region",
        render_mode="svg", log_y=False)
fig.update_layout(title="Current hospitalized, per million")
fig.show()
In [109]:
df3 = df2.copy()

df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')

fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of new deaths, absolute numbers")
fig.show()
In [110]:
df2['NewNoOfTests'] = df2['NoOfTests'] - df2.groupby(['Region'])['NoOfTests'].transform('shift')
df2.head()

df2['New_per_test'] = df2['NewPositives']/df2['NewNoOfTests']*100

fig = px.line(df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])], 
              x=df2[df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])].index, y="New_per_test", color="Region", hover_name="Region",
        render_mode="svg", log_y=True, line_shape='spline')
fig.update_layout(title="New positive cases in daily tests in Northern regions, %")
fig.show()
In [111]:
df2['Deaths_per_mio'] = (df2['Deaths']/df2['Pop'])*1000_000
fig = px.line(df2, x="Date", y="Deaths_per_mio", color="Region", 
              hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per million")
fig.show()
In [112]:
df2['Change_per_mio'] = df2['VariationOfPositives']/df2['Pop']*1000_000
df2['Change_per_mio'] = df2['Change_per_mio'].rolling(window=7).mean()


# [df2['Region'].isin(['Lombardia','Veneto','Emilia-Romagna','Piemonte','Liguria'])]
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_mio", color="Region", hover_name="Date")
fig.update_layout(title="1-week rolling average of daily change in positive cases, per million (excl. Valle d'Aosta)")
fig.show()

 

Italy as a whole

Data from all regions aggregated

In [113]:
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()

df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal'])

fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True, 
              line_shape='spline')
fig.update_layout(title="Number of daily new positive cases, current IC patients and total hospitalized")
fig.show()
In [ ]: